07-TIMCI-tflow Audit Analysis

Lucas Silbernagel

23 September, 2021

library(dplyr)
library(readr)
library(ggplot2)
library(openxlsx)
library(knitr)
library(tibble)
library(stringr)
library(stringi)
library(readxl)
library(lubridate)
library(shiny)
library(plotly)

1 Loading the Data and Removal of Training Data

# Unzip and extract ODK data from ODK zip
df <- as.data.frame(extract_data_from_odk_zip(params$file_path_zip, params$file_name_csv))

# Formatting dates from integer (in ms) to time stamp
df$start <- format_date_ms(df$start)
df$end <- format_date_ms(df$end)

# filtering for events that occurred after 18th July 21
df <- subset(df, as.Date(start) > as.Date("18.07.2021", "%d.%m.%Y"))

2 Deriving New Features

2.1 Time Spent per Event

# subtracting end from start date
df$time_spent = round(as.numeric(df$end - df$start))

2.2 Question

# splitting the node strings so that only the question name remains 
df$question = sapply(df$node, create_question)

2.3 Question Decoded

df <- decode_question(df, df$question, params$codebook)

2.4 Categorical Answers Decoded

df <- decode_categories(df, params$codebook)

2.5 Time until a Response was Changed + Stream of Answer Changes

df <- df %>%
# bringing the data in the right order   
  arrange(`instance ID`, node, start) %>%
# adding two empty columns to store the new features in
  add_column(time_till_change=NA) %>%
  add_column(changed_from=NA)

# iterating over the df and computing the time it took until an answer was changed + adding what the question was before 
for (i in 1:nrow(df)){
  if (df$`old-value`[i]==df$`new-value`[i-1] && !is.na(df$`old-value`[i]) && !is.na(df$`new-value`[i-1]) ){
    df$time_till_change[i] <- round(as.numeric(df$start[i]-df$end[i-1]))
  } else{
    next
  }
}

2.6 Preview and Summary of the Final Data

head(df)
instance ID event node start end latitude longitude accuracy old-value new-value time_spent question question_decoded new_value_decoded old_value_decoded time_till_change changed_from
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 group questions /data/child_identification 2021-08-17 12:54:01 2021-08-17 12:55:20 NA NA NA NA NA 79 child_identification NA NA NA NA NA
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 group questions /data/child_identification 2021-08-17 22:14:47 2021-08-17 22:14:49 NA NA NA NA NA 2 child_identification NA NA NA NA NA
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 group questions /data/child_identification 2021-08-17 22:15:37 2021-08-17 22:15:39 NA NA NA NA NA 2 child_identification NA NA NA NA NA
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 question /data/child_identification/a1_a_4a 2021-08-17 12:54:01 2021-08-17 12:55:20 NA NA NA NA T-F0010-P0193 79 a1_a_4a If QR code scanning is not possible, please manually enter the participant identification code T-F0010-P0193 NA NA NA
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 group questions /data/front_page 2021-08-17 08:32:03 2021-08-17 08:32:05 NA NA NA NA NA 2 front_page NA NA NA NA NA
uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 group questions /data/front_page 2021-08-18 07:55:33 2021-08-18 07:55:34 NA NA NA NA NA 2 front_page NA NA NA NA NA
summary(df)
##  instance ID           event               node               start                    
##  Length:2599        Length:2599        Length:2599        Min.   :2021-07-31 13:17:06  
##  Class :character   Class :character   Class :character   1st Qu.:2021-08-18 09:47:06  
##  Mode  :character   Mode  :character   Mode  :character   Median :2021-08-19 08:55:06  
##                                                           Mean   :2021-08-17 12:17:01  
##                                                           3rd Qu.:2021-08-20 10:05:23  
##                                                           Max.   :2021-08-22 09:32:53  
##                                                                                        
##       end                      latitude       longitude      accuracy      
##  Min.   :2021-07-31 13:21:20   Mode:logical   Mode:logical   Mode:logical  
##  1st Qu.:2021-08-18 09:25:01   NA's:2599      NA's:2599      NA's:2599     
##  Median :2021-08-19 08:54:47                                               
##  Mean   :2021-08-17 08:12:00                                               
##  3rd Qu.:2021-08-20 09:34:54                                               
##  Max.   :2021-08-22 09:32:48                                               
##  NA's   :565                                                               
##   old-value          new-value           time_spent         question        
##  Length:2599        Length:2599        Min.   :     0.0   Length:2599       
##  Class :character   Class :character   1st Qu.:     6.0   Class :character  
##  Mode  :character   Mode  :character   Median :    37.0   Mode  :character  
##                                        Mean   :   393.0                     
##                                        3rd Qu.:   185.8                     
##                                        Max.   :109257.0                     
##                                        NA's   :565                          
##  question_decoded   new_value_decoded  old_value_decoded  time_till_change
##  Length:2599        Length:2599        Length:2599        Min.   : 0.000  
##  Class :character   Class :character   Class :character   1st Qu.: 2.000  
##  Mode  :character   Mode  :character   Mode  :character   Median : 6.000  
##                                                           Mean   : 9.938  
##                                                           3rd Qu.:12.000  
##                                                           Max.   :54.000  
##                                                           NA's   :2534    
##  changed_from  
##  Mode:logical  
##  NA's:2599     
##                
##                
##                
##                
## 

3 General Information about the Data

no_inst = length(unique(df$`instance ID`))
no_event =  nrow(df)
earliest_start = as.Date(min(df$start)) 
latest_end = as.Date(max(df$end[!is.na(df$end)]))

Total number of instances: 53
Total number of events/questions: 2599
Examination period: 2021-07-31 - 2021-08-22

4 Grouped by Time

4.1 Events/Questions Started by Day

df_by_day <- df %>%
  mutate(start_date = as.Date(start)) %>%
  count(start_date, name = "count")

gg1 <- ggplot(df_by_day, aes(x = start_date, y = count)) +
  geom_line() +
  geom_smooth(alpha=0.5, colour="red", method="loess", se=F) +
  labs(title = "Number of Events/Questions Started by Day with Smoothed Regression Line", y =  "Number of Questions/Events Started", x = "Satrt Date") +
  theme_light() 
gg1

4.2 Questions/Events started by Weekday and Hour of the Day

df_wday_hour <- df %>%
  mutate(wday=wday(start, label=T, week_start = 1), hour=hour(start)) %>%
  count(wday, hour, name="count_wday_hour") %>%
  arrange(desc(wday))

theme_heatmap <- theme_light() +                 
  theme(panel.grid = element_blank(),            
        panel.border = element_blank(),          
        plot.title = element_text(face = "bold", size = 11, hjust = 0.5), 
        axis.ticks = element_blank(),            
        axis.title.x = element_blank(),        
        axis.title.y = element_text(size=10),   
        axis.text.y = element_text(size = 8),    
        axis.text.x = element_text(size = 10),   
        legend.position = "none")                

gg2 <- ggplot(df_wday_hour, aes(x = wday, y = hour, fill = count_wday_hour)) +
  geom_tile(colour="white") +  
  scale_fill_gradient(low = "#fff0f0", high="#940606") +  
  scale_y_reverse(breaks=c(23:0), labels=c(23:0), expand = c(0,0)) +               
  scale_x_discrete(expand = c(0,0), position = "top") +
  labs(title = "Number of Started Events/Questions by Day of Week / Hour of Day", y = "Hour of Day") +
  geom_text(aes(label = count_wday_hour), size = 2) +
  theme_heatmap  
gg2

4.3 Distribution of Time Spent per Event/Question with largest 5 % removed

df_clean = subset(df, time_spent<quantile(df$time_spent,0.95, na.rm=TRUE))

hist(df_clean$time_spent[!is.na(df_clean$time_spent)]/60, breaks=20, xlab = "Time Spent in Minutes", main = "Histogram of the Time Spent by Question")

5 Aggregated by Event/Question

5.1 Median Time Spent by Question

df_median_time_per_question <- df %>%
  filter(event=="question") %>%
  group_by(question_decoded) %>%
  summarise(median_time_spent = median(time_spent)) %>%
  arrange(desc(median_time_spent)) %>%
  mutate(median_time_spent = round(seconds_to_period(median_time_spent)))

df_median_time_per_question
question_decoded median_time_spent
cdsastart 8M 22S
comments 4M 49S
poxend 3M 23S
poxstart 2M 48S
step_end 1M 34S
If QR code scanning is not possible, please manually enter the participant identification code 1M 30S
Please scan the participant’s QR code 1M 17S
step_type 1M 13S
waiting_comment 51S
Please select the current district 39S
step_other 25S
fcode 18S
arrival 12S
cdsaend 10S

5.2 Count of Input Changes and Median Time until Input was Changed by Question

df_changes_per_question <- df %>%
  filter(event=="question", 
         !is.na(time_till_change)) %>%
  group_by(question_decoded) %>%
  summarise(count_input_changes=n(), 
            median_time_till_change=median(time_till_change), 
            sd_time_till_change=sd(time_till_change)) %>%
  arrange(desc(count_input_changes)) %>%
  mutate(median_time_till_change = round(seconds_to_period(median_time_till_change)),
         sd_time_till_change = round(seconds_to_period(sd_time_till_change), 1)) %>%
  filter(count_input_changes > 1)

df_changes_per_question
question_decoded count_input_changes median_time_till_change sd_time_till_change
comments 30 9S 15.5S
step_type 23 5S 6.1S
step_end 6 8S 5.3S
waiting_comment 3 6S 3.1S
If QR code scanning is not possible, please manually enter the participant identification code 2 0S 0.7S

5.3 Count of Old-New Value Pairs

df_stream <- df %>%
  filter(!is.na(time_till_change)) %>%
  count(question_decoded, 
        old_value_decoded, 
        new_value_decoded, 
        name="count_value_pairs", 
        sort=TRUE) %>%
  filter(count_value_pairs > 1)

df_stream
question_decoded old_value_decoded new_value_decoded count_value_pairs
step_end The child is exiting the facility The child is moving to the next step 6
step_type other walking 3
step_type other waiting 2
step_type waiting laboratory testing 2
step_type waiting walking 2
step_type walking other 2

6 Aggregated by Instance

6.1 Top 10 % of Duration by Instance

df_duration_per_inst <- df %>%
  group_by(`instance ID`) %>%
  summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>% 
  filter(duration_per_inst>quantile(duration_per_inst, 0.9, na.rm=TRUE)) %>%
  mutate(duration_per_inst = round(seconds_to_period(duration_per_inst))) %>%
  arrange(desc(duration_per_inst))

df_duration_per_inst
instance ID duration_per_inst
uuid:40ecef80-d668-466a-8b6d-b1b3342af214 8d 14H 3M 16S
uuid:260dca23-57f3-43b8-8ee6-411783c89c45 2d 21H 36M 8S
uuid:63ba1773-d340-4ea0-bddb-03be8b8587dd 2d 12H 58M 6S
uuid:e3c416ae-1d97-4b44-8a82-0deb658a76c0 2d 8H 51M 45S
uuid:696d589b-0ed3-4c70-81d3-aee201655262 2d 1H 42M 35S
uuid:16b98e4f-ac5c-4b88-8de3-13f0c67e49f1 1d 22H 29M 12S

6.2 Distribution of Duration by Instance with Top 10 % excluded

df_subsetted <- df %>%
  group_by(`instance ID`) %>%
  summarise(duration_per_inst = max(end, na.rm=T) - min(start, na.rm=T)) %>%
  filter(duration_per_inst<quantile(duration_per_inst, 0.9, na.rm=TRUE))
 
hist(as.numeric(df_subsetted$duration_per_inst/60), breaks=30, main="Duration per Instance in Minutes (outliers removed)", xlab="Duration in Minutes")

7 Irregularities and Outliers

7.1 Time Till Change Outliers (for all data without removed outliers)

df_time_till_change_outliers <- df %>% 
  filter(time_till_change>quantile(df$time_till_change, 0.9, na.rm=TRUE)) %>% 
  arrange(desc(time_till_change)) %>%
  mutate(time_till_change = round(seconds_to_period(time_till_change))) %>%
  select(`instance ID`, 
         question_decoded, 
         old_value_decoded, 
         new_value_decoded, 
         time_till_change)

df_time_till_change_outliers
instance ID question_decoded old_value_decoded new_value_decoded time_till_change
uuid:75276ac5-ad78-41a5-84de-f78777ae34f8 comments Clinical officer delay to start registration and consultation because there was a meeting in consultation room.
Caregiver and patient return back to pharmacy for the second time because the nurse attendant did not understand properly what was written in prescription, therefore the nurse attendant request caregiver to return it to clinical officer to write it properly and return back. Caregiver and patient waited for a long time to enter in consultation room for registration because there was a meeting in the consultation room .

Clinical officer delayed to start registration because she had a meeting with visitors and staffs. Both registration and consultation conducted in the same room (consultation room). Caregiver and patient return back to pharmacy for the second time because the nurse attendant did not understand properly what was written in prescription, therefore the nurse attendant requested caregiver to return it to clinical officer to write it properly and return it to pharmacy. Clinical officer did not use pulse oximeter. Clinical officer did not use IMCI guideline. | 54S| |uuid:04bcbdca-b5b6-4189-992a-5ae3765c5441 |comments |The caregiver had 3 sick children, only 1 eligible for enrollment in TIMCI Registration service window opened at 0915hrs Time flow recorded after enrollment in TIMCI by RA Tb screening done because it’s the first time the patient is treated in the facility While waiting for 2nd consultation the clinician wasn’t in the consultation room until 1210hrs |The caregiver had 3 sick children, only 1 eligible for enrollment in TIMCI Registration service window opened at 0915hrs Time flow recorded after enrollment in TIMCI by RA Tb screening done because it’s the first time the patient is treated in the facility While waiting for 2nd consultation the clinician wasn’t in the consultation room until 1210hrs While waiting at the dispensing area there was no HCP until 1350hrs | 52S| |uuid:b77a9a81-74b2-406c-851f-b18c993e47e4 |comments |The patient moved from one step to another without having to wait for long because the were few patients until when the patient got to dispensing area where the medical attendant was out so they had to wait for sometime before being attended to |The patient moved from one step to another without having to wait for long because the were few patients until when the patient got to dispensing area where the medical attendant was out so they had to wait for sometime before being attended to They was also deviation because instead going to the dispensing area they went to timci research assistant for further documentation.Thus explaining double walking in steps 12 and 13 | 41S| |uuid:63ba1773-d340-4ea0-bddb-03be8b8587dd |comments |Dispensary crowded since it doesn’t open during the weekends CO not around due to administrative activities Consultation was done by EN instead of CO The EN started providing services at the ANC, followed by dressing room, then Consultation. Priority given to elderly patients Lab tests (MRDT and PITC) done in consultation room Dispensing of meds done by MA. |Dispensary crowded since it doesn’t open during the weekends CO not around due to administrative activities Consultation was done by EN instead of CO The EN started providing services at the ANC, followed by dressing room, then Consultation. Priority given to elderly patients Registration done in consultation room Lab tests (MRDT and PITC) done in consultation room Dispensing of meds done by MA. | 39S| |uuid:696d589b-0ed3-4c70-81d3-aee201655262 |comments |Due to the long waiting line the TIMCI RA checked if caregiver/patient are eligible for enrollment, verbal consent obtained for shadowing obtained - TIMCI enrollment done in co |-Due to the long waiting line the TIMCI RA checked if caregiver/patient are eligible for enrollment, verbal consent for shadowing obtained. - TIMCI enrollment done in the 5th step: consultation waiting area From 0929hrs to 0943hrs -In the 5th step there was no clinician in the consultation room until 1035hrs -Lab waiting area crowded, lab opened late, there were also lab referrals from community dispensaries - | 39S| |uuid:696d589b-0ed3-4c70-81d3-aee201655262 |comments |-Due to the long waiting line the TIMCI RA checked if caregiver/patient are eligible for enrollment, verbal consent for shadowing obtained. - TIMCI enrollment done in the 5th step: consultation waiting area From 0929hrs to 0943hrs -In the 5th step there was no clinician in the consultation room until 1035hrs -Lab waiting area crowded, lab opened late, there were also lab referrals from community dispensaries - |-Due to the long waiting line the TIMCI RA checked if caregiver/patient are eligible for enrollment, verbal consent for shadowing obtained. - TIMCI enrollment done in the 5th step: consultation waiting area From 0929hrs to 0943hrs -In the 5th step there was no clinician in the consultation room until 1035hrs -Lab waiting area crowded, lab opened late, there were also lab referrals from community dispensaries - In the 8th step: Lab waiting area, there was a HCP explaining on how to collect stool sample. | 31S| |uuid:887c48e5-4325-4c30-80bf-d2139b476991 |step_type |walking |other | 21S|

7.2 Histograms of Instances with Inconsistent Filling Behaviour

irregular_inst = c()
for (id in unique(df$`instance ID`)){
  bin_vec = cut(df$start[df$`instance ID`==id], 
                breaks=10, 
                labels=F)
  if (length(unique(bin_vec)) < 5) irregular_inst = c(irregular_inst, id)
}
paste0(length(irregular_inst), " out of ", length(unique(df$`instance ID`))," instances were found to have an inconsistent filling behaviour.")
## [1] "29 out of 53 instances were found to have an inconsistent filling behaviour."
last_bin_questions = c()
fig <- plot_ly(alpha=0.1)
for (id in irregular_inst){
  temp_df = df[df$`instance ID`==id,]
  temp_df$cut = cut(temp_df$start, breaks=10, labels=c("1. Part", "2. Part", "3. Part", "4. Part", "5. Part", "6. Part", "7. Part", "8. Part", "9. Part", "10. Part"))
  fig <- fig %>% add_histogram(x=temp_df$cut, name=id)
  
  last_bin_questions = c(last_bin_questions, temp_df$question_decoded[temp_df$cut=="10. Part"])
}
fig <- fig %>% layout(barmode = "overlay")
fig
kable(table(last_bin_questions) %>% as.data.frame() %>% arrange(desc(Freq)))
last_bin_questions Freq
comments 16
step_other 8
step_type 6
waiting_comment 2
If QR code scanning is not possible, please manually enter the participant identification code 1